##################################################################
# Responsive Rhetoric: Evidence from Congressional Redistricting #
# Jaclyn Kaslovsky and Michael Kistner
# Appendix Code
##################################################################

###############################################
# Load Packages and Set the Working Directory #
###############################################

# Load libraries
library(broom)
library(cowplot)
library(fixest)
library(ggpubr)
library(haven)
library(lubridate)
library(magrittr)
library(modelsummary)
library(stargazer)
library(tidyverse)
library(tictoc)
library(tidyverse)
library(quanteda)
library(quanteda.textmodels)
library(quanteda.textstats)

#setwd("~/Dropbox/Redistricting, Ideology, and Communication/")

# Specify options 
options("modelsummary_format_numeric_latex" = "plain")

####################
# Load in the data #
####################

load("Replication Files/Data/District and Member Changes (116th and 117th).Rda")
load("Replication Files/Data/Tweets by Members of Congress (116th and 117th).Rda")
load("Replication Files/Data/CES Perceptions.RDa")

####################
# Figure A.1.      #
####################

### Graphs of Cook PVI Change -------------------------------------------------
# Prepare data for graphing (creating facet labels)
graphing_data <- prepost_df %>%
  mutate(Party = ifelse(Party == "D", "Change for Democrats", "Change for Republicans"),
         GraphType = "Histogram of Cook PVI Changes (House Districts)",
         change = ifelse(CookPVI_117 == CookPVI_116, 0, 1),
         jitter = runif(nrow(prepost_df), -0.3, 0.3),
         CookPVI_116 = CookPVI_116 + jitter)

# Plot change sizes and locations for Dems. vs. Republicans  
change_arrowplot <- graphing_data %>% 
  ggplot() +
  geom_point(aes(x = CookPVI_116, y = 0), alpha = 0.5,
             position = position_jitter(height = 0.3, width = 0.0)) +
  geom_segment(data = filter(graphing_data, change == 1),
               aes(group = ICPSR,
                   x = CookPVI_116,
                   y = 0,
                   xend = CookPVI_116,
                   yend = CookPVI_Change),
               arrow = arrow(length = unit(0.2, "cm"),
               ),
               alpha = 0.5) + 
  facet_wrap(~Party, scales = "free_x") + 
  labs(x = "Cook PVI (2020 District)",
       y = "Change in Cook PVI (2020 to 2022)") +
  theme_bw() +
  theme(
    panel.grid.minor = element_blank()
  )

change_arrowplot

####################
# Figure B.1.      #
####################

# Put data into proper form for graphing
graphing_data <- member_tweets %>%
  group_by(MemberICPSR) %>%
  summarize(MemberName = first(MemberName),
            MemberParty = first(MemberParty),
            NOMDim1 = mean(NOMDim1, na.rm = TRUE),
            NOMDim2 = mean(NOMDim2, na.rm = TRUE),
            TweetSlant = mean(ProbR))

# Plot results as a scatterplot
ggplot(graphing_data, aes(x = NOMDim1, y = TweetSlant, color = MemberParty)) +
  geom_smooth(method = "lm", se = FALSE) +
  ggpubr::stat_cor(p.accuracy = 0.001, show.legend = FALSE) + 
  geom_point(shape = 1) +
  scale_color_manual(values = c("Navy", "DarkRed")) +
  labs(x = "DW-NOMINATE Score \n (1st Dimension)", 
       y = "Tweet Ideology \n (Class Affinity Model)",
       color = "Party") +
  theme_bw() +
  theme(
    legend.position = "bottom"
  )

####################
# Figure C.1.      #
####################

### Creating Word Usage Figure -------------------------------------------------
# Create document corpus from tweets
tweet_corpus <- member_tweets %>%
  mutate(TweetText = str_replace_all(TextOriginal, "[/-]", " "), # extract words from URLs
         MemberSession = str_c(MemberICPSR, Session, "-"),
         DocID = TweetID,
         PartisanDecile = case_when(
           ProbR < 0.1 ~ 1,
           ProbR < 0.2 ~ 2,
           ProbR < 0.3 ~ 3,
           ProbR < 0.4 ~ 4,
           ProbR < 0.5 ~ 5,
           ProbR < 0.6 ~ 6,
           ProbR < 0.7 ~ 7,
           ProbR < 0.8 ~ 8,
           ProbR < 0.9 ~ 9,
           ProbR < 1.10 ~ 10
         )) %>% 
  select(DocID, TweetID, TweetText, ProbR, PartisanDecile, Date, Session, MemberName, MemberICPSR, 
         MemberSession, MemberParty, RetweetCount, ReplyCount, LikeCount, QuoteCount) %>% 
  group_by(MemberSession) %>%
  mutate(TweetCount = n()) %>%
  ungroup() %>%
  corpus(text_field = "TweetText", 
         docid_field = "TweetID")

# Convert corpus into document-feature matrix 
tweet_dfm <- tweet_corpus %>%
  tokens(remove_punct = TRUE,
         remove_symbols = TRUE,
         remove_numbers = FALSE,
         remove_url = FALSE, 
         split_hyphens = FALSE,
         split_tags = FALSE) %>%
  tokens_tolower() %>%
  tokens_select(min_nchar=2L) %>% 
  tokens_select(pattern = stopwords("en"), 
                selection = "remove") %>%
  tokens_wordstem(language = quanteda_options("language_stemmer")) %>%
  tokens_ngrams(n = c(2)) %>%
  dfm() %>%
  dfm_trim(min_docfreq = 0.001, docfreq_type = "prop")

# Create column for 1st quintile
quintile_1 <- textstat_keyness(tweet_dfm,
                               target = (docvars(tweet_dfm)$PartisanDecile == 1 |
                                           docvars(tweet_dfm)$PartisanDecile == 2),
                               measure = "lr",
                               sort = TRUE) %>% 
  filter(!(str_detect(feature, "com")), 
         !(str_detect(feature, "status")), 
         !(str_detect(feature, "https")),
         !(str_detect(feature, "@"))) %>%
  slice(1:15) %>%
  rownames_to_column(var = "position") %>%
  mutate(quintile = "1")

# Create column for 2nd quintile
quintile_2 <- textstat_keyness(tweet_dfm,
                               target = (docvars(tweet_dfm)$PartisanDecile == 3 |
                                           docvars(tweet_dfm)$PartisanDecile == 4),
                               measure = "lr",
                               sort = TRUE) %>% 
  filter(!(str_detect(feature, "com")), 
         !(str_detect(feature, "status")), 
         !(str_detect(feature, "https")),
         !(str_detect(feature, "@"))) %>%
  slice(1:15) %>%
  rownames_to_column(var = "position") %>%
  mutate(quintile = "2")

# Create column for 3rd quintile
quintile_3 <- textstat_keyness(tweet_dfm,
                               target = (docvars(tweet_dfm)$PartisanDecile == 5 |
                                           docvars(tweet_dfm)$PartisanDecile == 6),
                               measure = "lr",
                               sort = TRUE) %>% 
  filter(!(str_detect(feature, "com")), 
         !(str_detect(feature, "status")), 
         !(str_detect(feature, "https")),
         !(str_detect(feature, "@"))) %>%
  slice(1:15) %>%
  rownames_to_column(var = "position") %>%
  mutate(quintile = "3")

# Create column for 4th quintile
quintile_4 <- textstat_keyness(tweet_dfm,
                               target = (docvars(tweet_dfm)$PartisanDecile == 7 |
                                           docvars(tweet_dfm)$PartisanDecile == 8),
                               measure = "lr",
                               sort = TRUE) %>% 
  filter(!(str_detect(feature, "com")), 
         !(str_detect(feature, "status")), 
         !(str_detect(feature, "https")),
         !(str_detect(feature, "@"))) %>%
  slice(1:15) %>%
  rownames_to_column(var = "position") %>%
  mutate(quintile = "4")

# Create column for 5th quintile
quintile_5 <- textstat_keyness(tweet_dfm,
                               target = (docvars(tweet_dfm)$PartisanDecile == 9 |
                                           docvars(tweet_dfm)$PartisanDecile == 10),
                               measure = "lr",
                               sort = TRUE) %>% 
  filter(!(str_detect(feature, "com")), 
         !(str_detect(feature, "status")), 
         !(str_detect(feature, "https")),
         !(str_detect(feature, "@"))) %>%
  slice(1:15) %>%
  rownames_to_column(var = "position") %>%
  mutate(quintile = "5")

# Combine into single graphing dataframe
graphing_data <- bind_rows(quintile_1, quintile_2, quintile_3,
                           quintile_4, quintile_5) %>%
  mutate(position = as.integer(position))

# Create plot
ggplot(graphing_data, aes(x = quintile, y = position)) +
  geom_text(aes(label = feature)) +
  geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), arrow = arrow(ends = "both")) +  
  scale_x_discrete(position = "top",
                   labels = c("Far Left \n (0.0 - 0.2)",
                              "Lean Left \n (0.2 - 0.4)",
                              "Moderate \n (0.4 - 0.6)",
                              "Lean Right \n (0.6 - 0.8)",
                              "Far Right \n (0.8 - 1.0)"),
                   name = "Tweet Partisanship Score") + 
  scale_y_reverse() + 
  theme_minimal() +
  theme(
    axis.text.y = element_blank(),
    axis.title.y = element_blank(),
    panel.grid = element_blank(),
  )

####################
# Table D.1.      #
####################

# Effect of district ideology change on tweet ideology change
model_1 <- feols(TweetExtremism_Change ~ DistIdeol_Change + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, RanForReelection == 1))

model_1d <- feols(TweetExtremism_Change ~ DistIdeol_Change,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "D", RanForReelection == 1))

model_1r <- feols(TweetExtremism_Change ~ DistIdeol_Change,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "R", RanForReelection == 1))

# Create table of results
modelsummary(list("Pooled" = model_1,
                  "Democrats" = model_1d,
                  "Republicans" = model_1r),
             fmt=3,
             stars= c('+' = .10,
                      '*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("DistIdeol_Change" = "District Ideology Change",
                          "PartyR" = "Republican"),
             gof_map = c("nobs", "adj.r.squared"),
             note = "Robust standard errors shown in parentheses. +p<0.10, *p<0.05; **p<0.01")

####################
# Figure E.1.      #
####################
# Estimate effect sepatately by redistricting control variable
model_redist <- filter(prepost_df, RanForReelection == 1) %>%
  mutate(RedistrictingControl = relevel(as.factor(RedistrictingControl), 
                                      "Nonpartisan Redistricting")) %>%
  feols(TweetExtremism_Change ~ CookFavorability_Change*RedistrictingControl + Party,
        vcov = "hetero",
        data = .)

# Put coefficients and SEs into dataframe for graphing
graphing_data <- data.frame(
  estimate = coef(model_redist)[c(2, 7:9)],
  se = se(model_redist)[c(2, 7:9)],
  redistricting = c("Nonpartisan \n Redistricting", "Divided Party \n Control", 
                    "Other Party \n Controls", "Own Party \n Controls")) %>%
  mutate(redistricting = factor(redistricting,
                                levels = c("Divided Party \n Control",
                                           "Other Party \n Controls",
                                           "Own Party \n Controls",
                                           "Nonpartisan \n Redistricting")),
         estimate = ifelse(redistricting != "Nonpartisan \n Redistricting",
                           estimate + coef(model_redist)[2], estimate),
         lower = estimate - 1.96 * se,
         upper = estimate + 1.96 * se)

# Create plot
ggplot(graphing_data, aes(x = estimate, y = redistricting)) +
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0) +
  geom_point(shape = 21, fill = "white", size = 3) +
  geom_vline(aes(xintercept = 0), lty = "solid") +
  geom_vline(aes(xintercept = 0.001), lty = "dashed", color = "darkred") +
  labs(x = "Estimated Effect of Redistricting Change", y = NULL) + 
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank()
  )

####################
# Table F.1.      #
####################
# Effect of competition change on tweet extremism change
model_2 <- feols(TweetExtremism_Change ~ PreviouslyCompetitive + NewlyCompetitive + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, RanForReelection == 1))

model_2d <- feols(TweetExtremism_Change ~ PreviouslyCompetitive + NewlyCompetitive,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "D", RanForReelection == 1))

model_2r <- feols(TweetExtremism_Change ~ PreviouslyCompetitive + NewlyCompetitive,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "R", RanForReelection == 1))

# Create table of results
modelsummary(list("Pooled" = model_2,
                  "Democrats" = model_2d,
                  "Republicans" = model_2r),
             fmt=3,
             stars= c('+' = .10,
                      '*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("TweetExtremismChange" = "Change in Tweet Extremity",
                          "PreviouslyCompetitive" = "Newly Safe District",
                          "NewlyCompetitive" = "Newly Competitive District",
                          "PartyR" = "Republican"),
             gof_map = c("nobs", "adj.r.squared"),
             note = "Robust standard errors shown in parentheses. +p<0.10, *p<0.05; **p<0.01")

####################
# Table G.1.      #
####################

### Regression Models of Retweet Changes (Supplemental) ------------------------
# Effect of Cook PVI change on retweet change
model_1 <- feols(RetweetExtremism_Change ~ CookFavorability_Change + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, RanForReelection == 1))

model_1d <- feols(RetweetExtremism_Change ~ CookFavorability_Change ,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "D", RanForReelection == 1))

model_1r <- feols(RetweetExtremism_Change ~ CookFavorability_Change,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "R", RanForReelection == 1))

# Effect of competition change on retweet change
model_2 <- feols(RetweetExtremism_Change ~ PreviouslyCompetitive + NewlyCompetitive + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, RanForReelection == 1))

model_2d <- feols(RetweetExtremism_Change ~ PreviouslyCompetitive + NewlyCompetitive,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "D", RanForReelection == 1))

model_2r <- feols(RetweetExtremism_Change ~ PreviouslyCompetitive + NewlyCompetitive,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "R", RanForReelection == 1))

# Create table of results
modelsummary(list("Pooled" = model_1,
                  "Democrats" = model_1d,
                  "Republicans" = model_1r,
                  "Pooled" = model_2,
                  "Democrats" = model_2d,
                  "Republicans" = model_2r),
             fmt=4,
             stars= c('+' = .10,
                      '*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("CookFavorability_Change" = "Change in Cook PVI",
                          "PreviouslyCompetitive" = "Newly Safe District",
                          "NewlyCompetitive" = "Newly Competitive District",
                          "PartyR" = "Republican"),
             gof_map = c("nobs", "adj.r.squared"),
             note = "Robust standard errors shown in parentheses. +p<0.10, *p<0.05; **p<0.01")

####################
# Table H.1.      #
####################

# Effect of Cook PVI change on roll call ideology change
model_1 <- feols(VoteExtremism_Change ~ CookFavorability_Change + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, RanForReelection == 1))

# Effect of Cook PVI change on roll call ideology change (Democrats)
model_2 <- feols(VoteExtremism_Change ~ CookFavorability_Change,
                 vcov = "hetero",
                 data = filter(prepost_df, Party == "D", RanForReelection == 1))

# Effect of Cook PVI change on roll call ideology change (Republicans)
model_3 <-  feols(VoteExtremism_Change ~ CookFavorability_Change,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "R", RanForReelection == 1))

# Effect of 21/22 Cook PVI on roll call ideology in the 117th
model_4 <- feols(VoteExtremism_117 ~ CookFavorability_116 + 
                   CookFavorability_117 + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, RanForReelection == 1))

# Effect of 21/22 Cook PVI on roll call ideology in the 116th
model_5 <- feols(VoteExtremism_116 ~ CookFavorability_116 + 
                   CookFavorability_117 + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, 
                               RanForReelection == 1,
                               !(is.na(TweetExtremism_117))))

# Calculate ratio (accountability % of total effect)
coef(model_4)[3] / coef(model_5)[2]

# Create table of results
modelsummary(list("Change" = model_1,
                  "Change" = model_2,
                  "Change" = model_3,
                  "117th" = model_4,
                  "116th" = model_5),
             fmt=4,
             stars= c('+' = .10,
                      '*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("CookFavorability_Change" = "District Partisanship Change",
                          "CookFavorability_116" = "2020 District Partisanship",
                          "CookFavorability_117" = "2022 District Partisanship",
                          "PartyR" = "Republican"),
             add_rows = data.frame(
               term = c("Sample"),
               model_1 = c("All Members"),
               model_2 = c("Democrats"),
               model_4 = c("Republicans"),
               model_1 = c("All Members"),
               model_1 = c("All Members")
             ),
             gof_map = c("nobs", "adj.r.squared"),
             note = "Robust standard errors shown in parentheses. +p<0.10, *p<0.05; **p<0.01")

####################
# Table J.1.      #
####################
# Estimate model
model_cces <- feols(PerceivedExtreme ~ TweetExtremism + Copartisan + factor(Gender) + Age
                       + factor(Race) + factor(FamilyIncome) | ICPSR + Year, 
                    data=cces_df)

# Create table of results
modelsummary(list(model_cces),
             fmt=3,
             stars= c('*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("TweetExtremism" = "Tweet Extremism"),
             add_rows = data.frame(
               term = c("Controls", "Legislator Fixed Effects", "Year Fixed Effects"),
               model_1 = c("Y", "Y", "Y")),
             gof_map = c("nobs", "adj.r.squared"),
             note = "Legislator-clustered standard errors shown in parentheses. +p<0.10, *p<0.05; **p<0.01")